from transformers import BertModel, BertTokenizer
import torch

if __name__ == "__main__":
    tokenizer = BertTokenizer.from_pretrained('chinese_roberta_wwm_ext_pytorch')
    g_vocab_size = tokenizer.vocab_size
    print(g_vocab_size)
    g_bert = BertModel.from_pretrained('chinese_roberta_wwm_ext_pytorch')
    t = tokenizer.tokenize('你在干什么')
    print(tokenizer.convert_tokens_to_ids(t))